HW 02

Author

Nandakumar Kuthalaraja

if (!require("pacman")) 
  install.packages("pacman")
Loading required package: pacman
pacman::p_load(tidyverse, ggridges, glue, scales, ggthemes, openintro, ggrepel, dsbox, janitor, fs)

# set theme for ggplot2
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

# set width of code output
options(width = 85)

# set figure parameters for knitr
knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)



###All responses are in comments within the code****

1 - A new day, a new plot, a new geom

#read from dsbox pkg dataset
#glimpse(edibnb)

edibnb <- edibnb |>
  filter(!is.na(neighbourhood), !is.na(review_scores_rating))
median_rating <- edibnb |>
  group_by(neighbourhood) |>
  summarise(median_review_score = median(review_scores_rating, na.rm = TRUE))|>
  arrange(median_review_score)# got the median review score per neighborhood
  
#glimpse(median_rating)

neighborhood_order <- edibnb |>
  mutate(neighbourhood = factor(neighbourhood,  levels = median_rating$neighbourhood)) #ordering

ggplot(neighborhood_order, aes(x = review_scores_rating, y = neighbourhood)) +
  geom_density_ridges(alpha = 0.7) +
  theme(legend.position = "none",
        plot.subtitle = element_text(size = 8)) + 
  labs(
    title = "Airbnb Review of Edinburgh Neighborhood",
    x = "Review Score (0–100)",
    y = "Neighborhood",
    subtitle = "DensityRidge Plot"
  )
Picking joint bandwidth of 1.23

This plot, shows the distribution of Airbnb review scores across different neighborhoods in Edinburgh. Each ridge represents a neighborhood and illustrates how review scores are spread out for listings in that area. The highest scoring neighborhoods tend to have a “taller” distribution curve, implying a tighter spread of high review scores compared to the bottom few.

2 - Foreign Connected PACs

# get a list of files with "Foreign Connected PAC" in their names
list_of_files <- dir_ls(path = "data", regexp = "Foreign Connected PAC")

# read all files and bind
pac <- read_csv(list_of_files, id = "year")

# Clean the column names
cleaned_pac <- clean_names(pac)|>
  separate(`country_of_origin_parent_company`, into = c("country_of_origin", "parent_company"),
           sep = "/") |>
  mutate(
  yearStr = str_split(year, "-", simplify = TRUE)[, 2],
  clean_year_string = str_remove(yearStr, fixed(".csv")),
  year = as.integer(clean_year_string),
  repubs = as.numeric(str_remove(repubs, "\\$")),
  dems = as.numeric(str_remove(dems, "\\$"))
) |>
  select(-yearStr, -clean_year_string, -total)

#glimpse(cleaned_pac)

updated_pac_data <- cleaned_pac |>
  pivot_longer(
    cols = c(dems, repubs),      
    names_to = "party",           
    values_to = "amount"          
  )

updated_pac_data <- updated_pac_data |>
  mutate(
    party = case_when(
      party == "dems" ~ "Democrat",
      party == "repubs" ~ "Republican"
    )
  )
uk_pac <- updated_pac_data |>
  filter(country_of_origin == "UK") |> group_by(year, party) |>
  summarise(total_amount = sum(amount), .groups = "drop")

#glimpse(uk_pac)

ggplot(uk_pac, aes(x = year, y = total_amount / 1000000, color = party)) +
  geom_line(linewidth = 1.1) +
  labs(
    y = "Total amount",
    x = "Year",
    title = "UK-connected PACs' Contributions to US political parties"
  ) + 
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "M")) + 
  scale_color_manual(name = "Party", values = c("Democrat" = "blue", "Republican" = "red")) 

Mexico_pac <- updated_pac_data |>
  filter(country_of_origin == "Mexico") |> group_by(year, party) |>
  summarise(total_amount = sum(amount))

#glimpse(Mexico_pac)

ggplot(Mexico_pac, aes(x = year, y = total_amount / 1000000, color = party)) +
  geom_line(linewidth = 1.1) +
  labs(
    y = "Total amount",
    x = "Year",
    title = "Mexico-connected PACs'  Contributions to US political parties"
  ) + 
  scale_color_manual(name = "Party", values = c("Democrat" = "blue", "Republican" = "red")) +
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "M"))

Considered Mexico for the follow-up plot. Unlike UK, Mexico has been traditionally a low player in terms of $ value also seems, there is sharp increase for Republicansaround 2008-10 while also faing sharp decline in later years

3 - Median housing prices in the US

4 - Expect More. Plot More.

5 - Mirror, mirror on the wall, who’s the ugliest of them all?